###########################################################################################################
# Replication for correlations presented in Table A6 (Weimar electoral outcomes)
# Charnysh and Schaub, Migration and Social Change: Evidence from post-WWII Displacement in Germany, JOP
# R version 4.3.1 (2023-06-16) 
###########################################################################################################

rm(list = ls())
library(openxlsx)
library(dplyr)
library(xtable)

dat_el<-read.xlsx("county_data/Weimar_elections.xlsx")

##Manually correct population for Landkreis Münster or use NA (typo in the book)

##Religious composition in 1939
dat_el$scatholics1939<-dat_el$Katholische1939/dat_el$Population1939
dat_el$sprotestants1939<-dat_el$Evangelische1939/dat_el$Population1939
dat_el$sother1939<-(dat_el$Population1939-dat_el$Evangelische1939-dat_el$Katholische1939)/dat_el$Population1939
dat_el$relfrac1939<-1-(dat_el$scatholics1939^2+dat_el$sprotestants1939^2+dat_el$sother1939^2)

##Religious composition in 1950:
dat_el$scatholics1950<-dat_el$Kath1950/dat_el$Pop1950
dat_el$sprotestants1950<-dat_el$Evang1950/dat_el$Pop1950
dat_el$sother1950<-(dat_el$Pop1950-dat_el$Evang1950-dat_el$Kath1950)/dat_el$Pop1950
dat_el$sother1950[which(dat_el$sother1950<0)]<-NA #Landkreis Münster. Total population is too small; there is a discrepancy in various statistical publications, replaced w/ NA
dat_el$sother1950[which(dat_el$sother1950<0)]<-NA #Landkreis Münster. Total population is too small; there is a discrepancy in various statistical publications, replaced w/ NA
dat_el$sother1950[which(dat_el$sother1950<0)]<-NA #Landkreis Münster. Total population is too small; there is a discrepancy in various statistical publications, replaced w/ NA

dat_el$relfrac1950<-1-(dat_el$scatholics1950^2+dat_el$sprotestants1950^2+dat_el$sother1950^2)
#Main explanatory variable: 
dat_el$DeltaFrac<-dat_el$relfrac1950-dat_el$relfrac1939 

## Electoral outcomes from the weimar period: 

dat_el$SPD1928<-dat_el$n285spd/dat_el$n285wb 
dat_el$DNVP1928<-dat_el$n285dnvp/dat_el$n285wb
dat_el$ZENTRUM1928<-dat_el$n285zx/dat_el$n285wb
dat_el$DDP1928<-dat_el$n285ddpx/dat_el$n285wb

dat_el$SPD1930<-dat_el$n309spd/dat_el$n309wb 
dat_el$DNVP1930<-dat_el$n309dnvp/dat_el$n309wb
dat_el$ZENTRUM1930<-dat_el$n309zx/dat_el$n309wb
dat_el$DSTP1930<-dat_el$n309dstp/dat_el$n309wb

#July 1932 election: 
dat_el$SPD1932j<-dat_el$n327spd/dat_el$n327wb  
dat_el$DNVP1932j<-dat_el$n327dnvp/dat_el$n327wb
dat_el$ZENTRUM1932j<-dat_el$n327zx/dat_el$n327wb
dat_el$DSTP1932j<-dat_el$n327dstp/dat_el$n327wb

#November 1932 election: 
dat_el$SPD1932n<-dat_el$n32nspd/dat_el$n32nwb 
dat_el$DNVP1932n<-dat_el$n32ndnvp/dat_el$n32nwb
dat_el$ZENTRUM1932n<-dat_el$n32nzx/dat_el$n32nwb
dat_el$DSTP1932n<-dat_el$n32ndstp/dat_el$n327wb

# 1933 election: 
dat_el$SPD1933<-dat_el$n333spd/dat_el$n333wb
dat_el$ZENTRUM1933 <-dat_el$n333zx/dat_el$n333wb

## Table A6: Correlations and R2 between Delta diversity and electoral outcomes at the county level. 

corrs<-rbind(cbind(cor.test(dat_el$DeltaFrac, dat_el$SPD1928)$estimate, summary(lm(DeltaFrac~ SPD1928, data=dat_el))$r.squared),
             cbind(cor.test(dat_el$DeltaFrac, dat_el$ZENTRUM1928)$estimate,summary(lm(DeltaFrac~ ZENTRUM1928, data=dat_el))$r.squared),
             cbind(cor.test(dat_el$DeltaFrac, dat_el$DNVP1928)$estimate,summary(lm(DeltaFrac~ DNVP1928, data=dat_el))$r.squared),
             cbind(cor.test(dat_el$DeltaFrac, dat_el$DDP1928)$estimate,summary(lm(DeltaFrac~ DDP1928, data=dat_el))$r.squared),
             cbind(cor.test(dat_el$DeltaFrac, dat_el$SPD1930)$estimate, summary(lm(DeltaFrac~ SPD1930, data=dat_el))$r.squared),
             cbind(cor.test(dat_el$DeltaFrac, dat_el$ZENTRUM1930)$estimate, summary(lm(DeltaFrac~ZENTRUM1930, data=dat_el))$r.squared),
             cbind(cor.test(dat_el$DeltaFrac, dat_el$DNVP1930)$estimate, summary(lm(DeltaFrac~DNVP1930, data=dat_el))$r.squared),
             cbind(cor.test(dat_el$DeltaFrac, dat_el$DSTP1930)$estimate, summary(lm(DeltaFrac~ DSTP1930, data=dat_el))$r.squared),
             cbind(cor.test(dat_el$DeltaFrac, dat_el$SPD1932j)$estimate, summary(lm(DeltaFrac~ SPD1932j, data=dat_el))$r.squared),
             cbind(cor.test(dat_el$DeltaFrac, dat_el$ZENTRUM1932j)$estimate, summary(lm(DeltaFrac~ZENTRUM1932j, data=dat_el))$r.squared),
             cbind(cor.test(dat_el$DeltaFrac, dat_el$DNVP1932j)$estimate, summary(lm(DeltaFrac~DNVP1932j, data=dat_el))$r.squared),
             cbind(cor.test(dat_el$DeltaFrac, dat_el$DSTP1932j)$estimate, summary(lm(DeltaFrac~ DSTP1932j, data=dat_el))$r.squared),
             cbind(cor.test(dat_el$DeltaFrac, dat_el$SPD1932n)$estimate, summary(lm(DeltaFrac~ SPD1932n, data=dat_el))$r.squared),
             cbind(cor.test(dat_el$DeltaFrac, dat_el$ZENTRUM1932n)$estimate, summary(lm(DeltaFrac~ZENTRUM1932n, data=dat_el))$r.squared),
             cbind(cor.test(dat_el$DeltaFrac, dat_el$DNVP1932n)$estimate, summary(lm(DeltaFrac~DNVP1932n, data=dat_el))$r.squared),
             cbind(cor.test(dat_el$DeltaFrac, dat_el$DSTP1932n)$estimate, summary(lm(DeltaFrac~ DSTP1932n, data=dat_el))$r.squared),
             cbind(cor.test(dat_el$DeltaFrac, dat_el$SPD1933)$estimate, summary(lm(DeltaFrac~ SPD1933, data=dat_el))$r.squared),
             cbind(cor.test(dat_el$DeltaFrac, dat_el$ZENTRUM1933)$estimate, summary(lm(DeltaFrac~ZENTRUM1933, data=dat_el))$r.squared))

corrs<-as.data.frame(corrs, col.names=c("Variable","Pearson Correlation", "Proportion of explained variance (R^2)"), 
                     row.names=c("SPD 1928",  "Zentrum 1928",  "DNVP 1928", "DDP 1928",
                                "SPD 1930","Zentrum 1930","DNVP 1930", "DSTP 1930", 
                                "SPD 1932 (July)", "Zentrum 1932 (July)","DNVP 1932 (July)", "DSTP 1932 (July)", 
                                "SPD 1932 (November)","Zentrum 1932 (November)",  "DNVP 1932 (November)", "DSTP 1932 (November)", 
                                "SPD 1933", "Zentrum 1933"))
names(corrs)<-c("Pearson Correlation", "Propotion of explained variance (R^2)")

xtable(corrs, digits=3)

